import numpy
# 导入Matplotlib
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
import cn.zifangsky.kNN.classifyPerson as classifyPerson
import cn.zifangsky.kNN.kNN as kNN

font_set = font_manager.FontProperties(fname=r"c:/windows/fonts/simsun.ttc", size=12)

# 测试文件导入
datingDataMat,datingLabels = classifyPerson.file2matrix('C:/Users/zifangsky/Desktop/《机器学习实战》源码/Ch02/datingTestSet2.txt')
# print(datingDataMat)
# print(datingLabels)
# 测试图表显示
classifyPerson.showInMatplotlib(datingDataMat, datingLabels)

# 测试归一化特征值
# normDataSet, ranges, minValue = classifyPerson.autoNorm(datingDataMat)
# print(normDataSet)
# print(ranges)
# print(minValue)

'''
针对约会网站分类器的测试代码
'''
def datingClassTest():
    # 定义测试样本百分比
    testRatio = 0.10
    # 初始化样本
    datingDataMat, datingLabels = classifyPerson.file2matrix(
        'C:/Users/zifangsky/Desktop/《机器学习实战》源码/Ch02/datingTestSet2.txt')
    # 归一化特征值
    normDataSet, ranges, minValue = classifyPerson.autoNorm(datingDataMat)
    # 样本总数
    totleNum = normDataSet.shape[0]
    # 定义测试样本数量
    testNum = int(totleNum * testRatio)
    # 错误预测数目
    errorCount = 0.0

    for i in range(testNum):
        #使用前面写的K-邻近算法预测
        classifierResult = kNN.classify0(normDataSet[i,:],normDataSet[testNum:totleNum], datingLabels[testNum:totleNum], 3)
        # print('预测值：%d ---- 实际值：%d'%(classifierResult,datingLabels[i]))

        if(classifierResult != datingLabels[i]):
            errorCount += 1.0

    print('算法的错误率是：%f'%(errorCount / float(testNum)))


# datingClassTest()
# classifyPerson.classifyPerson()
